Cleaned data

Imported and cleaned all kiln data available from 2018-2020. Involved using an algorithm to remove high peaks and valleys, detection of when the “start” of a run was based on setpoint increases and kiln temperature increases. We now have mostly clean plots with a few exceptions.

Assorted

Assorted lots from each kiln.

all_kilns <- bind_rows(
  kilns_AB %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_C  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_D  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_E  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_F  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_G  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_H  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln)
)

# random sample of LOTNOs
set.seed(505)
n_kilns <- sample_n(all_kilns, 16) %>% dplyr::select(LOTNO) %>% unlist()

n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 2) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- unlist(n_kilns)

sample_kilns <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns, lot_compare=T)

A

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_a <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_a)

B

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_b <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_b)

C

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_c <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_c)

D

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_d <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_d)

E

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_e <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_e)

F

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_f <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_f)

G

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_g <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_g)

H

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_h <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_h)

AUC Calculations

One measure mentioned to have potential importance in defect rate is variation between setpoint and average kiln temperature in the 400°C to 600°C range. An algorithm takes the absolute value of the difference between the values, and adds them together to produce a new feature, displayed below.

AUC

Base plot of temperature and setpoint over time, with green area representing the claculated area between the two curves. Numeric values also printed for comparison.

plotAucValues(sample_kilns, x.nudge = 900, y.nudge = 0)

Cropped

plotAucValues(sample_kilns, crop=T, x.nudge = 0, y.nudge = 200)

Cropped, unlabeled

plotAucValues(sample_kilns, crop=T, free.x=T)

AUC distribution

Distribution varies greatly between kilns

Boxplot

Kilns G and H have by far the most consistent operation based on our measure.

df_merged_auc %>% 
  group_by(LOTNO) %>% slice(1) %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  # ggplot(aes(x=aucDiff, y=fct_reorder(KILN,aucDiff), fill =KILN2))+
  ggplot(aes(x=aucDiff, y=fct_reorder(KILN2,aucDiff)))+
  geom_boxplot(outlier.alpha = 0,
               outlier.shape = 21)+
  geom_jitter(height = .2, alpha=.1)+
  labs(title = "Setpoint vs temperature variation between kilns")+
  xlab("Area between curves")+
  ylab("Kiln")+
  theme(legend.position = 'none')+
  scale_x_continuous(labels = scales::label_number())

Density

Most of the distributions are not distributed normally and potentially require transformation depending on the analysis performed.

df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  ggplot(aes(x=aucDiff, y = ..count../sum(..count..)))+
  geom_density()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  facet_wrap(~KILN2, scales='free')

  # facet_wrap(~KILN2)

Impact on yields

How does our new area between the curves feature relate to yield values?

Lot yields

Starting very broadly, there are very weak correlations between the AUC feature and overall lot yields.

All

# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>% 
  group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>% 
  dplyr::summarise(
    total_fired = sum(TOTAL_ITEM_FIRED),
    total_rejected = sum(TOTAL_ITEM_REJECTED),
    pct_lot_yield = (total_fired - total_rejected) / total_fired
  ) %>% 
  mutate(KILN2 = str_replace(KILN, "R", ""))

df <- df %>% 
  group_by(KILN2) %>% 
  dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>% 
  left_join(df) %>% 
  mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,3), ")")))

df %>% 
  ggplot(aes(x=pct_lot_yield, y=aucDiff))+
  geom_pointdensity(alpha=.8, size=1)+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales='free_y')+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus entire lot yields',
       subtitle = 'Correlation value (in parentheses)')+
  theme(legend.position = 'none')

df %>% count(cor, KILN2) %>%  
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
         ) %>% 
  set_colnames(c("Correlation", "Kiln", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Kiln Observations
0.27 E 19
-0.24 A 134
-0.23 D 90
-0.18 C 55
0.13 F 129
-0.05 B 124
0.03 G 288
-0.01 H 216

Trendline

Trendline added.

df %>% 
  ggplot(aes(x=pct_lot_yield, y=aucDiff))+
  geom_pointdensity(alpha=.8, size=1)+
  geom_smooth(alpha=.1, color = 'red')+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales='free_y')+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus entire lot yields',
       subtitle = 'Correlation value (in parentheses)')+
  theme(legend.position = 'none')

Cropped

Cropped the x-axis: 80 - 100%

# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>% 
  group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>% 
  dplyr::summarise(
    total_fired = sum(TOTAL_ITEM_FIRED),
    total_rejected = sum(TOTAL_ITEM_REJECTED),
    pct_lot_yield = (total_fired - total_rejected) / total_fired
  ) %>% 
  mutate(KILN2 = str_replace(KILN, "R", ""))

df <- df %>% 
  group_by(KILN2) %>% 
  dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>% 
  left_join(df) %>% 
  mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,3), ")")))

df %>% 
  ggplot(aes(x=pct_lot_yield, y=aucDiff))+
  geom_pointdensity(alpha=.8, size=1)+
  scale_x_continuous(limits = c(0.8,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales='free_y')+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus entire lot yields',
       subtitle = 'Correlation value (in parentheses)')+
  theme(legend.position = 'none')

Item yields

Above, we tested the idea that yields of an entire lot could be affected by our AUC feature. Next, we hone in on specific kilns and determine if different items might show different yields within the kiln.

A

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "A")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
0.42 10“ODX1.75”,10PPI,CB 10
-0.28 5.19“TX4.13”BX1",10PPI,CB 31
-0.17 4“X4”X1",10PPI,CB 10
0.15 6.1“TX4.7”BX1.25",10PPI,CB 27
0.14 8“ODX1.5”,10PPI,CB 11
0.12 5“ODX1.25”,10PPI,CB 17
-0.1 5.125“X5.125”X1.25",10PPI,CB 18
0.08 7.5“X7.5”X.400",65PPI,CORD 10
-0.06 2“X2”X.75",10PPI,CB 21

B

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "B")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
0.33 3“X3”X1",10PPI,CB 22
0.24 5.125“X5.125”X1.25",10PPI,CB 22
-0.23 4“X4”X1",10PPI,CB 13
0.22 5“ODX1.25”,10PPI,CB 11
-0.15 2“X2”X.75",10PPI,CB 16
0.14 5.19“TX4.13”BX1",10PPI,CB 31
0.09 6.1“TX4.7”BX1.25",10PPI,CB 27
-0.06 3“ODX1”,MO10PPI,CB 12
0.03 UDICELL 125X125X30 10PPI CB 13

C

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "C")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
-0.82 4“X4”X1"-IC,10PPI,PSZT 7
-0.75 3“X3”X1"-IC,10PPI,PSZT 7
-0.62 1.5“ODX.8”,45PPI,PSZT,RBFG 10
-0.56 UDICELL 150X150X30 10PPI PSZT 8
0.44 2.6“X2.6”X.75",10PPI,PSZT 7
0.37 4“X4”X1",10PPI,PSZT 6
-0.25 UDICELL DIA 70X25 10PPI PSZT 7
0.17 2.75“ODX1.25”IDX.625",10PPI,PSZT 8
0.14 4“ODX1”,10PPI,PSZT 9

D

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "D")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
-0.27 3“X4”X.875",15PPI,AL92,SEC 26
-0.25 SQUARE,<5PPI,AL92 19
0.25 SMALLTOMB-1",<5PPI,AL92 16
0.24 10“TODX1.5”,50APPI,ALLT,LAM 53
0.23 DWG.C039-ARC,MO20PPI,AL92 15
-0.1 SQUARE,30PPI,AL92 36
0.09 SMALLARC-1",<5PPI,AL92 18
0.04 495MMX422MMX57MM-DWG,30APPI,OBSIC 15
0.03 SQUARE,45PPI,AL92 15

E

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "E")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
-1 145MMTODX122MMBODX38MM,50APPI,ALLT,SAF 2
-1 2“X4”X.875",10PPI,AL92 2
-1 3“X4”X.875",20PPI,AL92,SEC 2
-1 3“X4”X1.75",15/20PPI,AL92,SEC 2
1 4“ODX1”,10PPI,PSZM 2
0.97 495MX422MX57M-DWG,45APPI,OBSIC 3
0.94 495MMX422MMX57MM-DWG,30APPI,OBSIC 4
0.65 75MMX75MMX22MM,20PPI,AL92 3
0.43 3“X4”X.875",15PPI,AL92,SEC 5

F

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "F")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
0.6 2.75“ODX1.25”IDX.625",10PPI,PSZT 18
0.59 3“ODX1”,15PPI,PSZT,1/8"FG 14
0.35 UDICELL 150X150X30 10PPI PSZT 19
0.25 7“ODX1.25”,MO10PPI,PSZM 19
0.24 UDICELL DIA 150X30 10PPI PSZT 15
0.1 UDICELL 125X125X30 10PPI PSZT 24
-0.09 4“ODX1”,10PPI,PSZT 21
-0.04 4“X4”X1"-MD,10PPI,PSZT 20
-0.03 3“X4”X.875",15PPI,AL92,SEC 15

G

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "G")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
-0.38 44MMODX26MMIDX15MM,45PPI,AL99,FC 62
-0.18 4.5“ODX2.5”IDX1.375",20PPI,PSZM 40
0.18 7“X7”X1.25",12.5MMC,PSZM 38
0.14 7.33“TX6”BX1.25",10MMC,PSZM 40
-0.11 10“TODX2.5”-3"STRIP,10/20PPI,PSZM,FEC 58
-0.06 2“ODX.5”,30PPI,PSZM,SEC 64
-0.02 1.5“ODX.5”,30PPI,PSZM,SEC 46
0.02 6“X6”X1.25"-SP,12.5MMC,PSZM 41
0 1.5“ODX.8”,45PPI,PSZT,RBFG 84

H

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "H")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
# df$DESCRIPTION <- gsub('[[:punct:]]', "", df$DESCRIPTION)
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=total_item_pct_yield, y=aucDiff))+
  # geom_point()+
  # geom_bin2d()+
  # stat_density_2d(aes(fill=..level..))+
  # stat_bin_hex()+
  geom_pointdensity()+
  scale_x_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  xlab('Lot yield')+
  ylab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  # facet_wrap(~descr_cor)+
  facet_wrap(~descr_cor, scales='free_y')+
  theme(legend.position = 'none')

# table
df %>% 
  count(cor, DESCRIPTION) %>% 
  arrange(-abs(cor)) %>% 
  mutate(
    cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
  ) %>% 
  set_colnames(c("Correlation", "Description", "Observations")) %>% 
  kable(format = 'html', escape = 'F') %>% 
  kable_styling('striped',full_width = F)
Correlation Description Observations
-0.35 1“ODX.5”,45PPI,AL99,SEC 22
0.3 .75“ODX.5”,45PPI,AL99,SEC 24
0.18 3“X4”X.875",15PPI,AL92,SEC 24
0.15 DWGC097-REVA-TOMB,30PPI,AL99 27
-0.14 .91“ODX.5”-C259B,30PPI,AL99,SEC,“H4” 75
-0.13 .91“ODX.5”-C259B,15PPI,AL99,SEC,“H6” 54
-0.11 150MMODX90MMIDX200MM,10PPI,PSZM,FEC 72
0.1 150MMODX90MMIDX125MM,10PPI,PSZM,FEC 39
0.03 .75“ODX.5”,30PPI,AL99,SEC 125

Impact of defect rates

CW

Does AUC effect the occurrence of cracked webs of an item, on a per lot, per kiln basis (remember we cannot compare kilns to one another at this point due to the varying value distributions between them)?

# get lot fired total
df <- df_merged_auc %>%
  # lump kilns
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  mutate(reject_count_single_row = reject_vol_single_row_D * vol_piece) %>% 
  group_by(LOTNO) %>% 
  dplyr::summarise(total_lot_count_fired = sum(total_item_count_fired_D)) %>% 
  right_join(df_merged_auc)

# get defects total
df <- df %>% 
  group_by(LOTNO, CAUSE) %>%
  dplyr::summarise(total_defect_count_per_lot = sum(total_item_count_rejected_D)) %>% 
  right_join(df) %>% 
  group_by(LOTNO, CAUSE) %>%
  slice(1) %>% ungroup() %>%
  dplyr::select(LOTNO, CAUSE, total_lot_count_fired, total_defect_count_per_lot, aucDiff) %>% 
  mutate(pct_defect = 1 - (total_lot_count_fired - total_defect_count_per_lot)  / total_lot_count_fired )
  
# fill missing values
pct_defect_by_lot <- df %>% 
  pivot_wider(id_cols     = LOTNO, 
              names_from  = CAUSE, 
              values_from = pct_defect,
              values_fill = 0)

# join pct defect to aucDiff
pct_defect_by_lot <- pct_defect_by_lot %>% 
  pivot_longer(cols = BE:BIT) %>% 
  # join to aucDiff 
  left_join(
    df_merged_auc %>% 
      mutate(KILN = str_replace(KILN, "R", "")) %>% 
      group_by(LOTNO) %>% slice(1) %>% 
      dplyr::select(LOTNO, KILN, aucDiff)
  ) %>% 
  set_colnames(c("LOTNO", "CAUSE", "defect_pct", "KILN", "aucDiff")) %>% 
  mutate_if(is.character, factor)

# for CW, what is the relationship between KILN and aucDiff?
def = "CW"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor,scales='free_x')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,.15))+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))

BE

def = "BE"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor,scales='free_x')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,.2))+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))

DC

def = "DC"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales = 'free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,.1))+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))

Defect rates by kiln

A

# for each kiln, compare the defect rate vs AUC for each cause
kil = "A"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

B

# for each kiln, compare the defect rate vs AUC for each cause
kil = "B"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

C

# for each kiln, compare the defect rate vs AUC for each cause
kil = "C"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

D

# for each kiln, compare the defect rate vs AUC for each cause
kil = "D"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

E

# for each kiln, compare the defect rate vs AUC for each cause
kil = "E"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

F

# for each kiln, compare the defect rate vs AUC for each cause
kil = "F"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

G

# for each kiln, compare the defect rate vs AUC for each cause
kil = "G"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))

H

# for each kiln, compare the defect rate vs AUC for each cause
kil = "H"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2)+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate per defect vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))